Data preprocess for Bombus of Canada spatial analysis.

# Load libraries.
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(spatstat)
## Loading required package: spatstat.data
## Loading required package: spatstat.univar
## spatstat.univar 3.1-2
## Loading required package: spatstat.geom
## spatstat.geom 3.3-6
## Loading required package: spatstat.random
## spatstat.random 3.3-3
## Loading required package: spatstat.explore
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## spatstat.explore 3.4-2
## Loading required package: spatstat.model
## Loading required package: rpart
## spatstat.model 3.3-5
## Loading required package: spatstat.linnet
## spatstat.linnet 3.2-5
## 
## spatstat 3.3-2 
## For an introduction to spatstat, type 'beginner'
library(sp)
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(spatstat.geom)
library(RColorBrewer)

Read dataset

1. Read Bombus occurrence data.

# Read occurrence data
bombus_data <- read_tsv("../data/0008996-250402121839773/occurrence.txt")
## Rows: 16088 Columns: 223
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr   (41): license, institutionCode, collectionCode, basisOfRecord, recorde...
## dbl   (18): gbifID, catalogNumber, startDayOfYear, endDayOfYear, year, month...
## lgl  (161): accessRights, bibliographicCitation, language, modified, publish...
## dttm   (3): lastInterpreted, lastParsed, lastCrawled
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Check data.
dim(bombus_data)
## [1] 16088   223

Filter to entries with coordinates and genus Bombus:

bombus_filtered <- bombus_data %>%
  filter(!is.na(decimalLatitude), !is.na(decimalLongitude), genus == "Bombus")

Removed 282 records without coordinates and not belong to Bombus.

Totally, there are 15806 bombus records.

Check specificEpithet:

Definition – Specific Epithet

In biological taxonomy, a specific epithet is the second part of the scientific name (binomial name) of a species. It identifies the species within a given genus.

unique_specific_epithet_all <- unique(bombus_filtered$specificEpithet)

There are 40 specific species in North America.

2. Read BC Covariates.

load('../data/BC_Covariates.Rda')

data_covariates <- DATA

3. Read BC Park

load('../data/BC_Parks.Rda')
data_parks <- DATA

# Create the parks ppp object
ppp_parks <- ppp(
  x = data_parks$Parks$X, 
  y = data_parks$Parks$Y, 
  window = as.owin(data_parks$Window))

# Add region information as marks
marks(ppp_parks) <- data_parks$Parks$Region

Explore the Covariate Data

sapply(data_covariates, class)
##            Window         Elevation            Forest               HFI 
## "SpatialPolygons"              "im"              "im"              "im" 
##        Dist_Water 
##              "im"
summary(data_covariates)
##            Length Class           Mode
## Window      1     SpatialPolygons S4  
## Elevation  10     im              list
## Forest     10     im              list
## HFI        10     im              list
## Dist_Water 10     im              list

Plot the Window and Image Class Objects

par(mfrow = c(3,2))
plot(data_covariates$Window)
plot(data_covariates$Elevation)
plot(data_covariates$Forest)
plot(data_covariates$HFI)
plot(data_covariates$Dist_Water)

Filter Bumble Bee in BC Province

bombus_bc <- subset(bombus_filtered, stateProvince == "British Columbia")

# View(bombus_bc)
# 3185 223
dim(bombus_bc)
## [1] 3185  223
bumble_bc_percent <- (dim(bombus_bc)[1] / dim(bombus_filtered)[1]) * 100
print(paste(bumble_bc_percent, "% of bumble bee sightings across Canada occurred in BC."))
## [1] "20.1505757307352 % of bumble bee sightings across Canada occurred in BC."

There are 3185 bumble bee occurrences in BC provinces. We removed 12621 records.

Check specificEpithet:

unique_specific_epithet_bc <- unique(bombus_bc$specificEpithet)

There are 30 specific species the province of British Columbia, accounting for 75 % of the total found in North America.

Plot BC province window

# Convert the window to an owin object
sf_obj <- st_as_sf(data_covariates$Window)
window <- as.owin(sf_obj)
plot(window, main="BC Province Window")

Plot bombus bc dataset

# Check dataset columns.
# colnames(bombus_bc)
# Convert the cleaned data frame to an sf object
data_sf <- st_as_sf(bombus_bc, coords = c("decimalLongitude", "decimalLatitude"), crs = 4326)

# Define the target projection
projected_args <- "+proj=aea +lat_0=45 +lon_0=-126 +lat_1=50 +lat_2=58.5 +x_0=1000000 +y_0=0 +datum=NAD83 +units=m +no_defs"

# Transform the coordinates
bombus_bc_projected <- st_transform(data_sf, crs = projected_args)
# View(bombus_bc_projected)
dim(bombus_bc_projected)
## [1] 3185  222
# Get coordinations
bombus_bc_projected_coords <- st_coordinates(bombus_bc_projected)
head(bombus_bc_projected_coords)
##              X         Y
## [1,] 1467661.3  470452.3
## [2,] 1467661.3  470452.3
## [3,] 1176146.2  399333.2
## [4,]  846420.5 1069165.6
## [5,]  829971.4 1060433.6
## [6,]  846420.5 1069165.6
# Convert to ppp object.
ppp_bombus <- ppp(
  x = bombus_bc_projected_coords[, "X"],
  y = bombus_bc_projected_coords[, "Y"],
  window = window)
## Warning: 62 points were rejected as lying outside the specified window
## Warning: data contain duplicated points
# Plot bombus data
plot(ppp_bombus,  main = 'Bumble Bee in BC Province')
## Warning in plot.ppp(ppp_bombus, main = "Bumble Bee in BC Province"): 62 illegal
## points also plotted

Remove points outside BC province and remove duplicates.

From the above plot warning, we can see there are 62 points outside the BC province window. There are also duplicates in our dataset.

# Filter out points outside the window
inside_window <- bombus_bc_projected_coords[, "X"] >= window$xrange[1] & 
                 bombus_bc_projected_coords[, "X"] <= window$xrange[2] &
                 bombus_bc_projected_coords[, "Y"] >= window$yrange[1] &
                 bombus_bc_projected_coords[, "Y"] <= window$yrange[2]

marks_df <- data.frame(
  specificEpithet = bombus_bc_projected$specificEpithet[inside_window],
  locality        = bombus_bc_projected$locality[inside_window]
)

# Convert to ppp object
ppp_bombus <- ppp(x = bombus_bc_projected_coords[inside_window, "X"],
                 y = bombus_bc_projected_coords[inside_window, "Y"],
                 marks  = marks_df,
                 window = window)
## Warning: 46 points were rejected as lying outside the specified window
## Warning: data contain duplicated points

Remove duplicates.

# Remove duplicates
ppp_bombus <- ppp_bombus[!duplicated(ppp_bombus), ]

There are 2201 duplicated points. Now, there are 984 unique bomble bee datapoints left in BC province.

plot(unmark(ppp_bombus), main = "Bumble Bee Occurrences in BC")

Plot Specific Epithet vs. Bumble Bee Occurrences

# Extract levels and number of classes
epithet_levels <- unique(marks(ppp_bombus)$specificEpithet)
n_classes <- length(epithet_levels)

# Define color palette
colors <- colorRampPalette(brewer.pal(12, "Set3"))(n_classes)

# Define shape values (repeats if n_classes > 26)
all_shapes <- 0:25
shape_values <- rep(all_shapes, length.out = n_classes)

# Plot
plot(ppp_bombus,
     which.marks = "specificEpithet",
     main        = "Specific Epithet of Bumble Bee in BC",
     cols         = colors,
     pch         = shape_values,
     cex         = 0.8,
     legend      = FALSE
)

# Add a custom legend
legend(
  "topright", 
  legend = epithet_levels, 
  col    = colors, 
  pch    = shape_values, 
  cex    = 0.6, 
  ncol   = 2
)

Add marks

Add Distance, Dist Water for marks.

length_max <- length(dist)

# Distance -- 984 data points, no missing records.
dist <- nndist(ppp_bombus)

# Dist Water -- 984 data points, no missing records.
dist_water <- data_covariates$Dist_Water[ppp_bombus]

# Add marks.
marks(ppp_bombus)$Dist <- dist
marks(ppp_bombus)$DistWater <- dist_water

head(marks(ppp_bombus))

Plot

# Plot Distance
plot(ppp_bombus, which.marks = "Dist", main = "Bumble Bee Distance")

# Plot Elevation.
plot(data_covariates$Elevation, main = "Elevation")
points(ppp_bombus$x, ppp_bombus$y,
     pch = 16,
     cex = 0.6,
     col = "black"
     );par(new=TRUE)

points(ppp_bombus$x, ppp_bombus$y,
     pch = 16,
     cex = 0.5,
     col = "yellow"
     )

# Plot Forest.
plot(data_covariates$Forest, main = "Forest")
points(ppp_bombus$x, ppp_bombus$y,
     pch = 16,
     cex = 0.6,
     col = "yellow"
     );par(new=TRUE)

points(ppp_bombus$x, ppp_bombus$y,
     pch = 16,
     cex = 0.5,
     col = "black"
     )

# Plot HFI
plot(data_covariates$HFI, main = "HFI")
points(ppp_bombus$x, ppp_bombus$y,
     pch = 16,
     cex = 0.6,
     col = "black"
     );par(new=TRUE)

points(ppp_bombus$x, ppp_bombus$y,
     pch = 16,
     cex = 0.5,
     col = "yellow"
     )

# Plot Dist_water
plot(data_covariates$Dist_Water, main = "Dist Water")
points(ppp_bombus$x, ppp_bombus$y,
     pch = 16,
     cex = 0.6,
     col = "black"
     );par(new=TRUE)

points(ppp_bombus$x, ppp_bombus$y,
     pch = 16,
     cex = 0.5,
     col = "yellow"
     )

Park

Plot Park vs. Bumble bee occurrences.

# Plot the point pattern & assign to variable for specific information
ppp_parks_plot <- plot(ppp_parks,
                       main = "Parks VS. Bumble Bee Occurrences", 
                       col = "grey90", 
                       cols = brewer.pal(n = 5, name = "Set2"), 
                       pch = c(15, 19, 18, 17, 20), 
                       cex = 0.8, 
                       legend = FALSE
)

# Add a custom legend with a title 'Region'
legend("topright", 
       legend = c("North", "Ok", "South", "Tc", "West", "Bumble Bee"), 
       title = "Marks", 
       col = c(brewer.pal(n = 5, name = "Set2"), "yellow"), 
       pch = c(15, 19, 18, 17, 16, 20), 
       cex = 0.8
)

# Plot the bumble bee data points
plot(ppp_bombus, add = TRUE, cols = "black", pch = 20, cex = 0.6)
## Plotting the first column of marks
plot(ppp_bombus, add = TRUE, cols = "yellow", pch = 20, cex = 0.3)
## Plotting the first column of marks

Save ppp_bombus object

save(ppp_bombus, file = "../data/ppp_bombus.Rda")